package com.saturn.titan.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 网站认证抽取.
 * 备案号、许可证号、公网安备
 * @author lwh
 * @date 2014-04-22
 *
 */
public class SiteCertUtil {
	
	private static final String ICP_BACK_PATTERN = "[京|津|沪|渝|蒙|新|藏|宁|桂|黑|吉|辽|晋|冀|青|鲁|豫|苏|皖|浙|闽|赣|湘|鄂|粤|琼|甘|陕|黔|滇|川]\\s*([1-9a-zA-Z]{2,3}|[^x00-xff]{3})\\s*备\\s*\\d{8}\\s*号(\\s*-\\s*\\d+)?";
	private static final String ICP_LICENSE_PATTERN = "[京|津|沪|渝|蒙|新|藏|宁|桂|黑|吉|辽|晋|冀|青|鲁|豫|苏|皖|浙|闽|赣|湘|鄂|粤|琼|甘|陕|黔|滇|川]\\s*([1-9a-zA-Z]{2,3}|[^x00-xff]{3})\\s*[证|-]\\s*\\d{6,8}(\\s*号)?(\\s*-\\s*\\d+)?";
	private static final String POLICE_LICENSE_PATTERN = "[京|津|沪|渝|蒙|新|藏|宁|桂|黑|吉|辽|晋|冀|青|鲁|豫|苏|皖|浙|闽|赣|湘|鄂|粤|琼|甘|陕|黔|滇|川]公网安备\\s*\\d{12,14}(\\s*号)?";
	
	/**
	 * ICP 备案号
	 * @param html
	 * @return
	 */
	public static String findICPBack(String html) {
		return findByPattern(html, ICP_BACK_PATTERN);
	}
	
	/**
	 * ICP 许可证号
	 * @param html
	 * @return
	 */
	public static String findICPLicense(String html) {
		return findByPattern(html, ICP_LICENSE_PATTERN);
	}
	
	/**
	 * 公网安备案号.
	 * @param html
	 * @return
	 */
	public static String findPoliceBack(String html) {
		return findByPattern(html, POLICE_LICENSE_PATTERN);
	}
	
	/**
	 * 获取匹配结果.
	 * @param html
	 * @return
	 */
	private static String findByPattern(String html, String pattern) {
		Pattern p = Pattern.compile(pattern);
		Matcher m = p.matcher(html);
		
		if(m.find()) {
			return m.group(0);
		}
		
		return "";
	}
	
}
